import scrapy

from ScrapyObject.spiders.utils.url_utils import *

'''
需要延时1
scrapy crawl ydai -o ydai.json
https://e.xintangshipin.icu/
'''


class YdaiSpider(scrapy.Spider):
    name = "ydai"
    # 前缀
    prefix = 'https://e.'
    # 中缀
    website = 'xintangshipin'
    # 后缀
    suffix = '.icu/'
    allowed_domains = [website + '.icu']
    start_urls = [prefix + website + suffix]

    def __init__(self):
        self.i = 0

    def parse(self, response):
        content = get_data(response)
        video_url = get_video_url_one(content)
        if len(video_url):
            self.i = self.i + 1
            yield get_video_item(id=self.i, url=response.url, vUrl=format_url_one(video_url[0]))
        urls = response.xpath("//div[@class='media']//ul//li//a/@ href").extract()
        picture_urls = response.xpath("//div[@class='media']//ul//li//a/img/@ src").extract()
        temp_str = response.xpath("//div[@class='media']//ul//li//dt/text()").extract()
        if len(urls) and len(picture_urls) and len(temp_str):
            names = ""
            tags = ""
            for url in temp_str:
                if "片名：" in url:
                    names = url.split("：")[-1].strip()
                elif "类型：" in url:
                    tags = url.split("：")[-1].strip()
            self.i = self.i + 1
            yield get_video_item(id=self.i, tags=tags, url=split_joint(self.prefix + self.website + self.suffix, urls[0]), name=names, pUrl=picture_urls[0].split("?")[0])
        url_list = get_url(content)
        # 提取url
        for url in url_list:
            if url.startswith('/') and url.endswith('.html'):
                yield scrapy.Request(split_joint(self.prefix + self.website + self.suffix, url), callback=self.parse)
